In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import cv2
import math

from keras.applications import resnet50, inception_v3, xception, inception_resnet_v2
from keras.preprocessing import image
from tqdm import tqdm
Using TensorFlow backend.
In [2]:
print(random.choice(os.listdir("../input/train/cat")))
cat.1334.jpg

图片展示

In [2]:
path_cat = "../input/train/cat"
path_dog= "../input/train/dog"

def show_img(img_path_list):
    fig = plt.figure(figsize=(16, 4 * math.ceil(len(img_path_list)/4.0)))
    for i in range(len(img_path_list)):
        img = cv2.imread(img_path_list[i])
        img = img[:,:,::-1]

        ax = fig.add_subplot(math.ceil(len(img_path_list)/4),4,i+1)
        ax.axis('off')
        ax.set_title(img_path_list[i])
        img = cv2.resize(img, (224,224))
        ax.imshow(img)
    
    plt.show()
        
In [5]:
random.seed(21)
show_path_list = random.sample(os.listdir(path_cat),8)
for i in range(len(show_path_list)):
    show_path_list[i] = path_cat+"/"+show_path_list[i]
    
show_img(show_path_list)
In [6]:
random.seed(21)
show_path_list = random.sample(os.listdir(path_dog),8)
for i in range(len(show_path_list)):
    show_path_list[i] = path_dog+"/"+show_path_list[i]
    
show_img(show_path_list)
In [41]:
show_img("../input/train/dog/dog.8110.jpg".split())
In [3]:
# imgNet中的猫狗种类
Dogs = [ 'n02085620','n02085782','n02085936','n02086079','n02086240','n02086646','n02086910','n02087046','n02087394','n02088094','n02088238',
        'n02088364','n02088466','n02088632','n02089078','n02089867','n02089973','n02090379','n02090622','n02090721','n02091032','n02091134',
        'n02091244','n02091467','n02091635','n02091831','n02092002','n02092339','n02093256','n02093428','n02093647','n02093754','n02093859',
        'n02093991','n02094114','n02094258','n02094433','n02095314','n02095570','n02095889','n02096051','n02096177','n02096294','n02096437',
        'n02096585','n02097047','n02097130','n02097209','n02097298','n02097474','n02097658','n02098105','n02098286','n02098413','n02099267',
        'n02099429','n02099601','n02099712','n02099849','n02100236','n02100583','n02100735','n02100877','n02101006','n02101388','n02101556',
        'n02102040','n02102177','n02102318','n02102480','n02102973','n02104029','n02104365','n02105056','n02105162','n02105251','n02105412',
        'n02105505','n02105641','n02105855','n02106030','n02106166','n02106382','n02106550','n02106662','n02107142','n02107312','n02107574',
        'n02107683','n02107908','n02108000','n02108089','n02108422','n02108551','n02108915','n02109047','n02109525','n02109961','n02110063',
        'n02110185','n02110341','n02110627','n02110806','n02110958','n02111129','n02111277','n02111500','n02111889','n02112018','n02112137',
        'n02112350','n02112706','n02113023','n02113186','n02113624','n02113712','n02113799','n02113978']
Cats=['n02123045','n02123159','n02123394','n02123597','n02124075','n02125311','n02127052']
In [4]:
def error_img(model, path, animal):
    error_path=[]
    dir_list = os.listdir(path)
    for i in range(len(dir_list)):
        dir_list[i] = path+"/"+dir_list[i]
        tf_img = image.load_img(dir_list[i], target_size=(224,224))
        
        x = image.img_to_array(tf_img)
        x = np.expand_dims(x, axis=0)
        x = model.preprocess_input(x) 
        
        preds = real_model.predict(x)
        
        a = []
        for z in range(len(model.decode_predictions(preds, top=50)[0])):
            a.append(model.decode_predictions(preds, top=50)[0][z][0])
        
        if not (set(a) & set(animal)):
            show_img(dir_list[i].split())
            error_path.append(dir_list[i])
    
    return error_path
            
        
        
        
In [5]:
real_model = resnet50.ResNet50(weights='imagenet')
cat_error_renet50 = error_img(resnet50, path_cat, Cats)
In [6]:
real_model = inception_v3.InceptionV3(weights='imagenet')
cat_error_inception_v3 = error_img(inception_v3, path_cat, Cats)
In [7]:
real_model = xception.Xception(weights='imagenet')
cat_error_xception = error_img(xception, path_cat, Cats)
In [8]:
real_model = resnet50.ResNet50(weights='imagenet')
dog_error_renet50 = error_img(resnet50, path_dog, Dogs)
In [9]:
real_model = inception_v3.InceptionV3(weights='imagenet')
dog_error_inception_v3 = error_img(inception_v3, path_dog, Dogs)
In [10]:
real_model = xception.Xception(weights='imagenet')
dog_error_xception = error_img(xception, path_dog, Dogs)
In [11]:
cat_error_path = set(cat_error_renet50+cat_error_inception_v3+cat_error_xception)
dog_error_path = set(dog_error_renet50+dog_error_inception_v3+dog_error_xception)
In [17]:
print(len(set(list(cat_error_path)+list(dog_error_path))))
263
In [20]:
(list(cat_error_path)+list(dog_error_path))[0]
Out[20]:
'../input/train/cat/cat.2420.jpg'

清除错误数据

In [27]:
all_error = list(cat_error_path)+list(dog_error_path)
# 谨慎运行
for q in all_error:
    os.remove(str(q))
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-27-3f079c6fcf0a> in <module>
      2 # 谨慎运行
      3 for q in all_error:
----> 4     os.remove(str(q))
      5 
      6 print(len(os.listdir("../input/train/cat")))

FileNotFoundError: [Errno 2] No such file or directory: '../input/train/cat/cat.2420.jpg'
In [29]:
print(len(os.listdir("../input/train/cat")))
print(len(os.listdir("../input/train/dog")))
12271
12466
In [2]:
from keras.models import *
from keras.layers import *
from sklearn.utils import shuffle

#模型保存
import h5py
In [5]:
def save_feature(model_main, size, model_name, model_func = None):
    input_tensor = Input((size[1], size[0], 3))
    x = input_tensor
    if model_func:
        x = Lambda(model_func)(x)
    
    base_model = model_main(input_tensor=x, weights='imagenet', include_top=False)
#     对激活层求平均值防止过拟合
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
    
    gen = image.ImageDataGenerator()
#     返回标签
    train_generator = gen.flow_from_directory("../input/train", size, shuffle = False, batch_size=16)
#     不返回标签
    test_generator = gen.flow_from_directory("../input/test", size, shuffle=False, class_mode=None, batch_size=16)
    
    train = model.predict_generator(train_generator, 1547)
    test = model.predict_generator(test_generator, 782)
#     保存为文件
    with h5py.File("gap_%s.h5"%model_name) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("test", data=test)
        h.create_dataset("label", data=train_generator.classes)
        
save_feature(inception_v3.InceptionV3, (299,299),model_name='InceptionV3m', model_func =inception_v3.preprocess_input)
save_feature(xception.Xception, (299, 299), model_name='Xceptionm', model_func =xception.preprocess_input)
save_feature(inception_resnet_v2.InceptionResNetV2, (299,299), model_name='InceptionResNetV2m', model_func = inception_resnet_v2.preprocess_input)
save_feature(resnet50.ResNet50, (224,224), model_name='ResNet50m')
Found 24737 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
Found 24737 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
Found 24737 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
/usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/keras_applications/resnet50.py:265: UserWarning: The output shape of `ResNet50(include_top=False)` has been changed since Keras 2.2.0.
  warnings.warn('The output shape of `ResNet50(include_top=False)` '
Found 24737 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
In [26]:
print(len(os.listdir('../input/test/test')))
12500

模型读取

In [3]:
X_train = []
X_test = []

np.random.seed(777)

for filename in ["gap_InceptionResNetV2m.h5", "gap_Xceptionm.h5", "gap_InceptionV3m.h5"]:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])

X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)


X_train, y_train = shuffle(X_train, y_train)

自定义底部全连接层

  • 二分类神经网络,只有一个节点,使用sigmoid作为激活函数
  • 二分类问题,binary_crossentropy作为损失函数 adam作为优化函数
  • 参考 https://keras.io/zh/models/model/
In [4]:
input_tensor = Input(X_train.shape[1:])
x = Dropout(0.5)(input_tensor)
x = Dense(1, activation='sigmoid')(x)
model = Model(input_tensor, x)

model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])
In [5]:
model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 5632)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5632)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 5633      
=================================================================
Total params: 5,633
Trainable params: 5,633
Non-trainable params: 0
_________________________________________________________________
In [42]:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath='catVSdog.weights.best.hdf5', verbose=1, save_best_only=True)
# 每一批64, 20个epoch,将0.2拆分为验证集,显示训练日志
history = model.fit(X_train, y_train, batch_size=128,callbacks=[checkpointer], epochs=20, validation_split=0.2, verbose=1)
# 加载最优模型

model.load_weights('catVSdog.weights.best.hdf5')
Train on 19789 samples, validate on 4948 samples
Epoch 1/20
19789/19789 [==============================] - 2s 80us/step - loss: 0.0032 - acc: 0.9993 - val_loss: 0.0093 - val_acc: 0.9974

Epoch 00001: val_loss improved from inf to 0.00925, saving model to catVSdog.weights.best.hdf5
Epoch 2/20
19789/19789 [==============================] - 1s 71us/step - loss: 0.0034 - acc: 0.9993 - val_loss: 0.0096 - val_acc: 0.9974

Epoch 00002: val_loss did not improve from 0.00925
Epoch 3/20
19789/19789 [==============================] - 2s 80us/step - loss: 0.0033 - acc: 0.9993 - val_loss: 0.0087 - val_acc: 0.9972

Epoch 00003: val_loss improved from 0.00925 to 0.00872, saving model to catVSdog.weights.best.hdf5
Epoch 4/20
19789/19789 [==============================] - 1s 74us/step - loss: 0.0035 - acc: 0.9990 - val_loss: 0.0096 - val_acc: 0.9974

Epoch 00004: val_loss did not improve from 0.00872
Epoch 5/20
19789/19789 [==============================] - 1s 72us/step - loss: 0.0028 - acc: 0.9995 - val_loss: 0.0115 - val_acc: 0.9972

Epoch 00005: val_loss did not improve from 0.00872
Epoch 6/20
19789/19789 [==============================] - 1s 75us/step - loss: 0.0027 - acc: 0.9995 - val_loss: 0.0092 - val_acc: 0.9970

Epoch 00006: val_loss did not improve from 0.00872
Epoch 7/20
19789/19789 [==============================] - 1s 64us/step - loss: 0.0029 - acc: 0.9993 - val_loss: 0.0089 - val_acc: 0.9974

Epoch 00007: val_loss did not improve from 0.00872
Epoch 8/20
19789/19789 [==============================] - 2s 79us/step - loss: 0.0023 - acc: 0.9996 - val_loss: 0.0095 - val_acc: 0.9970

Epoch 00008: val_loss did not improve from 0.00872
Epoch 9/20
19789/19789 [==============================] - 1s 71us/step - loss: 0.0028 - acc: 0.9995 - val_loss: 0.0091 - val_acc: 0.9970

Epoch 00009: val_loss did not improve from 0.00872
Epoch 10/20
19789/19789 [==============================] - 1s 75us/step - loss: 0.0024 - acc: 0.9993 - val_loss: 0.0093 - val_acc: 0.9970

Epoch 00010: val_loss did not improve from 0.00872
Epoch 11/20
19789/19789 [==============================] - 1s 70us/step - loss: 0.0023 - acc: 0.9993 - val_loss: 0.0095 - val_acc: 0.9970

Epoch 00011: val_loss did not improve from 0.00872
Epoch 12/20
19789/19789 [==============================] - 1s 72us/step - loss: 0.0024 - acc: 0.9994 - val_loss: 0.0092 - val_acc: 0.9972

Epoch 00012: val_loss did not improve from 0.00872
Epoch 13/20
19789/19789 [==============================] - 1s 64us/step - loss: 0.0022 - acc: 0.9995 - val_loss: 0.0092 - val_acc: 0.9972

Epoch 00013: val_loss did not improve from 0.00872
Epoch 14/20
19789/19789 [==============================] - 1s 63us/step - loss: 0.0023 - acc: 0.9994 - val_loss: 0.0095 - val_acc: 0.9972

Epoch 00014: val_loss did not improve from 0.00872
Epoch 15/20
19789/19789 [==============================] - 1s 69us/step - loss: 0.0023 - acc: 0.9995 - val_loss: 0.0101 - val_acc: 0.9970

Epoch 00015: val_loss did not improve from 0.00872
Epoch 16/20
19789/19789 [==============================] - 1s 63us/step - loss: 0.0026 - acc: 0.9992 - val_loss: 0.0094 - val_acc: 0.9974

Epoch 00016: val_loss did not improve from 0.00872
Epoch 17/20
19789/19789 [==============================] - 1s 38us/step - loss: 0.0022 - acc: 0.9994 - val_loss: 0.0096 - val_acc: 0.9970

Epoch 00017: val_loss did not improve from 0.00872
Epoch 18/20
19789/19789 [==============================] - 1s 49us/step - loss: 0.0025 - acc: 0.9994 - val_loss: 0.0096 - val_acc: 0.9972

Epoch 00018: val_loss did not improve from 0.00872
Epoch 19/20
19789/19789 [==============================] - 1s 47us/step - loss: 0.0018 - acc: 0.9995 - val_loss: 0.0093 - val_acc: 0.9972

Epoch 00019: val_loss did not improve from 0.00872
Epoch 20/20
19789/19789 [==============================] - 1s 45us/step - loss: 0.0019 - acc: 0.9996 - val_loss: 0.0098 - val_acc: 0.9970

Epoch 00020: val_loss did not improve from 0.00872
In [43]:
print(history.history.keys())
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
In [44]:
import matplotlib.pyplot as plt
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
In [48]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)

df = pd.read_csv("sample_submission.csv")

gen = image.ImageDataGenerator()
test_generator = gen.flow_from_directory("../input/test", (224, 224), shuffle=False, 
                                         batch_size=16, class_mode=None)

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('grade.csv', index=None)
df.head(10)
12500/12500 [==============================] - 1s 67us/step
Found 12500 images belonging to 1 classes.
/usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/ipykernel_launcher.py:12: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead
  if sys.path[0] == '':
Out[48]:
id label
0 1 0.995
1 2 0.995
2 3 0.995
3 4 0.995
4 5 0.005
5 6 0.005
6 7 0.005
7 8 0.005
8 9 0.005
9 10 0.005
In [16]:
def show_end_img(img_path_list,csv):
    fig = plt.figure(figsize=(16, 4 * math.ceil(len(img_path_list)/4.0)))
    for i in range(len(img_path_list)):
        num = int(img_path_list[i])
        pred = csv.loc[int(num-1), 'label']
        path = '../input/test/test/'+str(num)+'.jpg'
        img = cv2.imread(path)
        img = img[:,:,::-1]

        ax = fig.add_subplot(math.ceil(len(img_path_list)/4),4,i+1)
        ax.axis('off')
        ax.set_title(path+'\n'+str(pred))
        img = cv2.resize(img, (224,224))
        ax.imshow(img)
    
    plt.show()
In [17]:
%matplotlib inline
csv = pd.read_csv('pred.csv')
img_np_array = np.random.randint(1, 12500, size=12, dtype='int')
img_list = img_np_array.tolist()
show_end_img(img_list, csv)